# 爬取历年电影票房
import sys
sys.path.append('../venv/Lib/site-packages')
import requests
from bs4 import BeautifulSoup
from time import sleep

base_url = 'http://www.boxofficecn.com/boxoffice'

# 导入database库进行数据库连接
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from database import connect

# 获取第year年的数据
def getData(year: int) -> list[dict]:
    response = requests.get(base_url + str(year))
    soup = BeautifulSoup(response.text, 'lxml')
    boxList = soup.select("tr")
    l = []
    for i in range(1, len(boxList)):
        d = {}
        try:
            d['year'] = int(boxList[i].select('td')[1].text)
            d['name'] = boxList[i].select('td')[2].text
            d['boxoffice'] = float(boxList[i].select('td')[3].text)
        except:
            continue
        l.append(d)
    return l


# 获取第 start - end 年的数据
def getBoxOfficeData(start: int, end: int):
    for i in range(start, end + 1):
        sleep(5)
        data = getData(i)
        print(data)
        connect.saveRankYear(i, data)



# 获取 2018 - 2022 的电影数据
getBoxOfficeData(2018, 2022)
